#!/usr/bin/env groovy
package scripts

/**
 * This script injects pubmed metadata into a pdf.
 */
@Grapes([
	@Grab(group='org.apache.ivy', module='ivy', version='2.2.0'),
	@Grab(group='org.apache.pdfbox', module='pdfbox', version='1.7.1'),
	@Grab(group='org.biogroovy', module='biogroovy',version='1.1')
])
import org.apache.pdfbox.pdmodel.common.*;
import org.apache.pdfbox.pdmodel.*;
import org.biogroovy.eutils.EUtilsURLFactory
import org.biogroovy.eutils.PubMedArticleReader;
import org.biogroovy.io.PDFArticleSerializer;
import org.biogroovy.models.*;

String file1 = "/Users/markfortner/Downloads/1471-2105-10-73.pdf"
String pmid = 19245720;


def cli = new CliBuilder(usage: 'PubMedPDF.groovy -f <inputFile> -p <pmid> -o <outputFile>');

cli.with {
	h( longOpt: 'help', 'PubMedPDF.groovy -f <inputFile> -p <pmid> -o <outputFile>')
	f( longOpt: 'inputFile', args:1, argName:'file','The PDF to be processed')
	p( longOpt: 'pmid', args:1, 'PubMed ID for file')
}


def options = cli.parse(args)

if (!options){
	return;
}

if (options.h){
	cli.usage();
}else {

	PubMedArticleReader reader = new PubMedArticleReader();

	pmid = options.p.trim();
	
	String url = EUtilsURLFactory.getURL( EUtilsURLFactory.EFETCH, [db:EUtilsURLFactory.DB_PUBMED, id:pmid, retmode:'xml']);
	
	InputStream is = new URL(url).openStream();
	Article article = reader.read(is);
	is.close();

	PDFArticleSerializer serializer = new PDFArticleSerializer();
	
	serializer.writeMetadata(options.f.trim(), article);

	println "Document updated"
}
